---
title: "Descriptives and Correlation Tables"
format: html
---

```{r}
library(lubridate)
library(ggplot2)
library(arrow)
library(here)
library(ggbeeswarm)
library(tidyr)
library(dplyr)
library(stringr)
library(ggcorrplot)
library(reshape2)
library(data.table)
library(lme4)
library(jtools)
library(kableExtra)
library(countrycode)
library(readr)
```

## Density Plots 

```{r}
methods <- c("ccr", "emfd", "ddr", "mfd2", "mfd", "moralbert")
methods_pretty_names <- c("MFD", "MFD2", "eMFD", "DDR", "CCR", "MoralBERT")
full_data <- data.frame()
for (method_name in methods) {
    temp_data <- read_feather(here("data", "for_graphs", "same_method", paste0("data_", method_name, ".feather")))
    temp_data$method <- method_name
    full_data <- rbind(full_data, temp_data)
}
long_full_data <- full_data |>
    pivot_longer(
        cols = authority.virtue:authority.vice,
        names_to = c("foundation", "pole"),
        names_pattern = "([a-z]*).([a-z]*)",
        values_to = "score"
    ) |>
    mutate(
        foundation = str_to_title(foundation),
        pole = str_to_title(pole)
    ) |>
    mutate(
        foundation = factor(foundation, levels = dimensions),
        method = case_match(
            method,
            "mfd2" ~ "MFD2",
            "mfd" ~ "MFD",
            "ccr" ~ "CCR",
            "emfd" ~ "eMFD",
            "ddr" ~ "DDR",
            "moralbert" ~ "MoralBERT"
        )
    ) |>
    mutate(
        method = factor(method, levels = methods_pretty_names)
    )
long_full_data |>
    # sample_n(54321) |>
    ggplot(aes(
        y = score,
        fill = pole,
        x = foundation
    )) +
    geom_violin(scale = "area") +
    # geom_boxplot() +
    labs(
        x = "Foundation",
        y = "Morality Score",
        fill = "Pole"
    ) +
    facet_wrap(~method, ncol = 2)
# , labeller = labeller(method = c(
#    `mfd2` = "MFD2",
#    `mfd` = "MFD",
#    `ccr` = "CCR",
#   `emfd` = "eMFD",
##   `ddr` = "DDR",
`moralbert` <- "MoralBERT"
# )), ncol = 2)
ggsave(here("graphs", "density", paste0("density_", "all", "_normalised.png")),
    width = 9, height = 8, dpi = 600,
    type = "cairo"
)

# Alternate Visualisation
long_full_data |>
    # sample_n(54321) |>
    ggplot(aes(
        y = score,
        fill = pole,
        x = method
    )) +
    geom_violin(scale = "area") +
    # geom_boxplot() +
    labs(
        x = "Method",
        y = "Morality Score",
        fill = "Pole"
    ) +
    facet_wrap(~foundation, ncol = 1)
ggsave(here("graphs", "density", paste0("density_alt_", "all", "_normalised.png")),
    width = 9, height = 11, dpi = 600,
    type = "cairo"
)
```

Statistics
```{r}
long_full_data |>
    # mutate(
    #    Method = case_match(
    #        method,
    #        c("ddr") ~ "DDR",
    ##       "ccr" ~ "CCR",
    #       "mfd2" ~ "MFD2",
    #       "mfd" ~ "MFD",
    ##      "emfd" ~ "eMFD",
    #      "moralbert" ~ "MoralBERT"
    #   )
    # ) |>
    rename(
        Pole = pole,
        Foundation = foundation,
        Method = method
    ) |>
    group_by(Method, Pole, Foundation) |>
    summarize(
        "Mean" = mean(score),
        "SD" = sd(score),
        "Coefficient of Variation" = sd(score) / mean(score),
    ) |>
    kable(
        format = "latex", digits = 2, booktabs = TRUE, longtable = TRUE,
        caption = "Descriptives of different measurement instruments (not normalised)", label = "tab:descriptives_mean_sd"
    ) |>
    collapse_rows(columns = 1:3) |>
    kable_styling(latex_options = c("repeat_header")) |>
    save_kable("graphs/tables/scoring_descriptive.tex")
# print(include.rownames=FALSE)

long_full_data |>
    rename(
        Pole = pole,
        Foundation = foundation,
        Method = method
    ) |>
    group_by(Method) |>
    summarize(
        "Mean" = mean(score),
        "SD" = sd(score),
        "Coefficient of Variation" = sd(score) / mean(score),
    )
```

## Correlations same method 
```{r}
methods <- c("mfd", "mfd2", "ccr", "emfd", "ddr", "moralbert")
poles <- c("virtue", "vice")
dimensions <- c("Care", "Fairness", "Loyalty", "Authority", "Sanctity")
correlation_measurements <- c("kendall", "spearman")
levels_for_corr <- c()
# rearrange the foundations as is typical in literature
for (pole in poles) {
    for (dimension in dimensions) {
        levels_for_corr <- c(levels_for_corr, paste0(dimension, " (", stringr::str_to_title(pole), ")"))
    }
}

get_significant_stars <- function(value_to_test) {
    value_to_test <- as.numeric(value_to_test)
    # for self-correlations
    if (value_to_test < 0.001) {
        value_to_replace <- "***"
    } else if (value_to_test < 0.01) {
        value_to_replace <- "**"
    } else if (value_to_test < 0.05) {
        value_to_replace <- "*"
    } else {
        value_to_replace <- ""
    }
    return(value_to_replace)
}


for (correlation_measure in correlation_measurements) {
    full_data <- data.frame()
    full_data_pv <- data.frame()
    for (method_name in methods) {
        temp_data <- read_feather(here("data", "for_graphs", "same_method", paste0(
            "correlation_", method_name, "_", correlation_measure,
            ".feather"
        )))
        temp_data_pv <- read_feather(here("data", "for_graphs", "same_method", paste0("correlation_", method_name, "_pvalues__", correlation_measure, ".feather")))
        temp_data$method <- method_name
        temp_data_pv$method <- method_name
        full_data <- rbind(full_data, temp_data)
        full_data_pv <- rbind(full_data_pv, temp_data_pv)
    }

    full_data_pv <- sapply(full_data_pv, as.character)
    for (i in seq_len(nrow(full_data_pv))) {
        for (j in seq(from = 2, to = ncol(full_data_pv) - 1)) {
            # value_to_test <- full_data_pv[i, j]
            if (full_data_pv[, "index"][i] == colnames(full_data_pv)[j]) {
                full_data_pv[i, j] <- ""
            } else {
                full_data_pv[i, j] <- get_significant_stars(full_data_pv[i, j])
            }
        }
    }
    full_data_pv <- as.data.frame(full_data_pv)

    transform_long_to_wide <- function(one_df) {
        all_correlations <- data.frame()
        for (method in methods) {
            melted_cormat <- reshape2::melt(one_df[full_data$method == method, ][1:ncol(one_df) - 1], id.vars = "index")
            melted_cormat$variable <- as.character(melted_cormat$variable)
            # i apologise for an ugly for loop
            for (i in 1:nrow(melted_cormat)) {
                melted_cormat[i, ]$variable <- paste0(stringr::str_to_title(str_split(melted_cormat[i, ]$variable, "\\.")[[1]][1]), " (", stringr::str_to_title(str_split(melted_cormat[i, ]$variable, "\\.")[[1]][2]), ")")
                melted_cormat[i, ]$index <- paste0(stringr::str_to_title(str_split(melted_cormat[i, ]$index, "\\.")[[1]][1]), " (", stringr::str_to_title(str_split(melted_cormat[i, ]$index, "\\.")[[1]][2]), ")")
            }

            melted_cormat$method <- method
            all_correlations <- rbind(all_correlations, melted_cormat)
        }
        return(all_correlations)
    }

    all_correlations <- transform_long_to_wide(full_data)
    all_correlations_pv <- transform_long_to_wide(full_data_pv)
    display_correlations <- all_correlations_pv |>
        rename(significance = value) |>
        right_join(all_correlations, by = c("index", "variable", "method")) |>
        rowwise() |>
        mutate(
            display_text = paste0(round(value, digits = 1), " ", significance),
            index = factor(index, levels = levels_for_corr),
            variable = factor(variable, levels = levels_for_corr),
            method = case_match(
                method,
                "mfd2" ~ "MFD2",
                "mfd" ~ "MFD",
                "ccr" ~ "CCR",
                "emfd" ~ "eMFD",
                "ddr" ~ "DDR",
                "moralbert" ~ "MoralBERT"
            )
        ) |>
        mutate(
            method = factor(method, levels = methods_pretty_names)
        )


    corr_plot <- ggplot(data = display_correlations, aes(
        x = factor(index), y = factor(reorder(variable, desc(variable))),
        # aes(reorder(index, desc(as.character(index))),
        #    reorder(variable, desc(as.character(variable))),
        fill = value
    )) +
        geom_tile(color = "white") +
        scale_fill_gradient2( # low = "blue", high = "red", mid = "white",
            midpoint = 0, limit = c(-1, 1), space = "Lab",
            name = "Correlation"
        ) +
        labs(
            x = "", y = ""
        ) +
        theme(axis.text.x = element_text(
            angle = 45, vjust = 1,
            hjust = 1
        )) +
        coord_fixed() +
        facet_wrap(~method, labeller = labeller(method = c(
            `mfd2` = "MFD2",
            `ccr` = "CCR",
            `emfd` = "eMFD",
            `ddr` = "DDR",
            `moralbert` = "MoralBERT"
        )), ncol = 2) +
        geom_text(aes(index, variable, label = display_text), size = 2.8)

    ggsave(here("graphs", "same_method", paste0("correlation_all_", correlation_measure, ".pdf")), width = 11, height = 14)
}

```

## Correlations of same category 

```{r}
poles <- c("virtue", "vice")
for (correlation_measure in correlation_measurements) {
    full_data <- data.frame()
    for (pole in poles) {
        temp_data <- read_feather(here("data", "for_graphs", "same_category", paste0(
            "mean_correlation_", pole, "_", correlation_measure,
            ".feather"
        )))
        temp_data$pole <- pole
        full_data <- rbind(full_data, temp_data)
    }
    correlations <- reshape2::melt(full_data) |>
        mutate(
            index = case_match(
                index,
                "mfd" ~ "MFD",
                "mfd2" ~ "MFD2",
                "emfd" ~ "eMFD",
                "ddr_all_en" ~ "DDR",
                "ccr_en_to_en" ~ "CCR",
                "moralbert" ~ "MoralBERT"
            ),
            variable = case_match(
                variable,
                "mfd" ~ "MFD",
                "mfd2" ~ "MFD2",
                "emfd" ~ "eMFD",
                "ddr_all_en" ~ "DDR",
                "ccr_en_to_en" ~ "CCR",
                "moralbert" ~ "MoralBERT"
            ),
            pole = str_to_title(pole)
        ) |>
        mutate(
            index = factor(index, levels = rev(methods_pretty_names), order = TRUE),
            variable = factor(variable, levels = rev(methods_pretty_names), order = TRUE)
        )


    corplot <- ggplot(data = correlations, aes(index, variable,
        fill = value
    )) +
        geom_tile(color = "white") +
        scale_fill_gradient2( # low = "blue", high = "red", mid = "white",
            midpoint = 0, limit = c(-1, 1), space = "Lab",
            name = "Correlation"
        ) +
        labs(
            x = "", y = ""
        ) +
        theme(axis.text.x = element_text(
            angle = 45, vjust = 1,
            hjust = 1
        )) +
        coord_fixed() +
        facet_wrap(~pole) +
        geom_text(aes(index, variable, label = round(value, digits = 2)), size = 3.5) +
        scale_x_discrete(drop = FALSE) + # this is somehow necessary for the right factor order, don't know why
        scale_y_discrete(drop = FALSE)

    ggsave(here("graphs", "same_category", paste0("same_category_all_", correlation_measure, ".pdf")),
        width = 12, height = 12
    )
}
```

## Correlations of multilingual data
```{r}
methods <- c("mfd", "ccr_multi_to_en", "ccr_multi_to_multi", "ddr")
for (correlation_measure in correlation_measurements) {
    full_data <- data.frame()
    for (method_name in methods) {
        temp_data <- read_feather(here("data", "for_graphs", "multi", paste0("correlations_", method_name, "_", correlation_measure, ".feather")))
        temp_data$method <- method_name
        full_data <- rbind(full_data, temp_data)
    }

    full_data <- full_data |>
        mutate(
            foundation = stringr::str_to_title(foundation),
            pole = stringr::str_to_title(pole),
            language = case_match(
                language,
                "es" ~ "Spanish",
                "nl" ~ "Dutch",
                "de" ~ "German",
            )
        ) |>
        mutate(
            foundation = factor(foundation, levels = c("Care", "Fairness", "Loyalty", "Authority", "Sanctity")),
            pole = factor(pole, levels = c("Virtue", "Vice"))
        )


    full_data |>
        mutate(
            "method" = case_match(
                method,
                "mfd" ~ "MFD",
                "ccr_multi_to_en" ~ "CCR (engl reference, multi. embedding )",
                "ccr_multi_to_multi" ~ "CCR (multi. reference, multi. embeding)",
                "ddr" ~ "DDR"
            ),
            correlation = round(
                correlation,
                digits = 2
            ),
            pvalue = case_when(
                pvalue < 0.001 ~ "<.001",
                .default = as.character(round(pvalue, digits = 3))
            )
        ) |>
        rename_with(str_to_title, !starts_with("pvalue")) |>
        select(Method, Language, Foundation, Pole, Correlation, pvalue) |>
        arrange(Method, Language, Foundation, Pole, ) |>
        na.omit() |>
        kable(
            format = "latex", booktabs = TRUE, longtable = T, label = paste0("multilingual_appendix_", correlation_measure),
            caption = paste0("Correlations and p-values for multilingual data, (", str_to_title(correlation_measure), ")")
        ) |>
        collapse_rows(columns = 1:3, longtable_clean_cut = FALSE) |>
        kable_styling(latex_options = c("repeat_header")) |>
        save_kable(paste0("graphs/tables/multilingual_correlations_", correlation_measure, ".tex"))

    full_data |> ggplot(aes(
        x = foundation,
        y = correlation,
        shape = pole,
        colour = language
    )) +
        geom_beeswarm(cex = 3, size = 2) +
        labs(
            x = "Foundation",
            y = "Correlation",
            color = "Language",
            shape = "Pole"
        ) +
        facet_wrap(~method, labeller = labeller(method = c(
            `mfd` = "MFD",
            `ccr_multi_to_en` = "CCR (engl reference, multi. embedding )",
            `ccr_multi_to_multi` = "CCR (multi. reference, multi. embeding)",
            `ddr` = "DDR"
        )))
    ggsave(here("graphs", "multi", paste0("correlations_", "all_", correlation_measure, ".pdf")), width = 8, height = 5)
}

# data <- read_feather(here("data", "for_graphs", paste0("correlations_", method_name, ".feather"))) # "combined_data.feather"))read_feather(paste0(c("data/for_graphs/correlations_", method_name, ".feather")))
```


## Regression Analysis

```{r}

partyfacts <- as.data.table(read.csv(here("data", "external", "partyfacts-external-parties.csv")))

parlgov_data <- as.data.table(read.csv(here("data", "external", "parlgov_cabinet.csv")))
parlgov_data <- parlgov_data[, .(election_date, cabinet_party, party_id, start_date)]
parlgov_data[, election_date := as.Date(election_date, format = "%Y-%m-%d")]
parlgov_data[, start_date := as.Date(start_date, format = "%Y-%m-%d")]
parlgov_data[, cabinet_party := as.logical(cabinet_party)]

setkey(parlgov_data, party_id)
parlgov_data

partyfacts_parlgov <- partyfacts[dataset_key == "parlgov", .(partyfacts_id, dataset_party_id)]
partyfacts_parlgov[, dataset_party_id := as.integer(dataset_party_id)]
setkey(partyfacts_parlgov, dataset_party_id)
partyfacts_parlgov

parlgov_data <- parlgov_data[partyfacts_parlgov, nomatch = 0]
parlgov_data$party_id <- NULL
setkey(parlgov_data, partyfacts_id)
parlgov_data

regression_data <- as.data.table(read_feather(here("data", "for_graphs", "regression", "scored_data.feather")))
regression_data <- unique(regression_data, by = "manifesto_id")
regression_data[, party := as.integer(party)]
setkey(regression_data, party)
regression_data
# regression_data[, date := as.integer(date)]
regression_data[, manifesto_year := as.Date(paste(date, "01", sep = ""), "%Y%m%d")]

partyfacts_manifesto <- partyfacts[dataset_key == "manifesto", .(partyfacts_id, dataset_party_id)]
partyfacts_manifesto[, dataset_party_id := as.integer(dataset_party_id)]
setkey(partyfacts_manifesto, dataset_party_id)

regression_data <- partyfacts_manifesto[regression_data, nomatch = 0]
regression_data$dataset_party_id <- NULL
setkey(regression_data, partyfacts_id)

partyfacts_ches <- partyfacts[dataset_key == "ches", .(partyfacts_id, dataset_party_id)]
partyfacts_ches[, dataset_party_id := as.integer(dataset_party_id)]
# ches_mapping_extra_la <- as.data.table(read.csv(here("data", "external", "mapping_ches_la.csv"), sep= ";"))
# ches_mapping_extra_la <- ches_mapping_extra_la[, .(party_id, partyfacts)]
# setnames(ches_mapping_extra_la, c("partyfacts", "party_id"), c("partyfacts_id", "dataset_party_id"))
# partyfacts_ches <- rbind(partyfacts_ches, ches_mapping_extra_la)
setkey(partyfacts_ches, partyfacts_id)

regression_nearly <- regression_data[partyfacts_ches, nomatch = 0]

ches_data <- as.data.table(read.csv(here("data", "external", "ches.csv")))
ches_data[, ches_id := as.integer(party_id)]
ches_data <- ches_data[, .(year, ches_id, galtan)]

ches_data[, ches_year := as.Date(paste(year, "0101"), format = "%Y%m%d")]
ches_data[, roll_year := ches_year]

ches_data <- ches_data[, .(ches_id, galtan, ches_year, roll_year)]
setkey(ches_data, ches_id, roll_year)

ches_la_data <- as.data.table(read.csv(here("data", "external", "ches_la.csv")))
ches_la_data[, ches_id := as.integer(party_id)]
ches_la_data <- ches_la_data[, .(ches_id, galtan)]
ches_la_data[, ches_year := as.Date("20200101", format = "%Y%m%d")]
ches_la_data[, roll_year := ches_year]
setkey(ches_la_data, ches_id, roll_year)

ches_data_complete <- rbind(ches_data, ches_la_data)
setkey(ches_data_complete, ches_id, roll_year)

regression_nearly[, roll_year := manifesto_year]
setnames(regression_nearly, c("dataset_party_id"), c("ches_id"))
setkey(regression_nearly, ches_id, roll_year)
nrow(regression_nearly)


regression_plus_galtan <- ches_data_complete[regression_nearly, roll = -Inf]
regression_plus_galtan
# filter out manifestos past 2020 because there is no CHES after
regression_plus_galtan <- regression_plus_galtan[!is.na(ches_year)]
regression_plus_galtan

setkey(regression_plus_galtan, partyfacts_id, roll_year)
parlgov_data[, start_date_2 := start_date]
setkey(parlgov_data, partyfacts_id, start_date)

regression_with_govt <- parlgov_data[regression_plus_galtan, roll = Inf]
regression_with_govt <- regression_with_govt[!is.na(cabinet_party)]
regression_with_govt

regression_with_govt[, diff_year := ches_year - manifesto_year]

filtered_data <- regression_with_govt[diff_year < 2 * 366, ]
filtered_data[, year := year(manifesto_year)]
nrow(filtered_data)
# View(test2[diff_year < 2*365, .(partyname, ches_year, manifesto_year)])

write_feather(filtered_data, here("data", "for_graphs", "regression", "current_working_data.feather"))
```

```{r}
poles <- c("virtue", "vice")
dimensions <- c("sanctity", "authority", "loyalty", "care", "fairness")
overall_results <- data.frame(
    foundation = character(),
    pole = character(),
    method = character(),
    coef = double(),
    ci_low = double(),
    ci_high = double()
)
methods <- c("mfd", "mfd2", "emfd", "ddr", "ccr")
singular_models <- c()
for (method in methods) {
    print(method)
    current_data <- as.data.table(read_feather(here("data", "for_graphs", "same_method", paste0("data_id_", method, ".feather"))))
    setkey(current_data, id_for_project)
    data <- read_feather(here("data", "source", "manifesto_corpus.feather"))
    regression_prepared <- as.data.table(read_feather(here("data", "for_graphs", "regression", "current_working_data.feather")))
    setkey(regression_prepared, id_for_project)
    calculation_data <- current_data[regression_prepared]
    # calculation_data
    this_method_results <- c()
    for (dimension in dimensions) {
        print(dimension)
        for (pole in poles) {
            dependent <- paste0(dimension, ".", pole)
            form_for_model <- paste0(dependent, "~ galtan + year  + sentiment_flair + govt + (1| countryname/manifesto_id)")
            model <- lmer(as.formula(form_for_model), data = calculation_data)
            # standardise model to get coefficients
            standardised_model <- datawizard::standardise(model)

            # https://www.learn-mlms.com/12-module-12.html
            ## Testing Assumptions
            calculation_data$l1resid <- residuals(standardised_model)
            # No correlation of level 1 predictors and residuals
            result_corr <- cor.test(calculation_data$l1resid, calculation_data$galtan)
            if (result_corr["p.value"] < 0.05) {
                print("Violating Assumption: Residuals correlate with GAL-TAN")
                break
            }
            result_corr <- cor.test(calculation_data$l1resid, calculation_data$sentiment_flair)
            if (result_corr["p.value"] < 0.05) {
                print("Violating Assumption: Residuals correlate with Sentiment")
                break
            }
            result_corr <- cor.test(calculation_data$l1resid, calculation_data$govt)
            if (result_corr["p.value"] < 0.05) {
                print("Violating Assumption: Residuals correlate with Government")
                break
            }

            # Ignoring the normality of residuals assumption because so huge?
            # indepence of level 2 residuals

            data_for_test <- calculation_data |>
                group_by(manifesto_id) |> # group data by clustering variable, school
                mutate(
                    dependent_mean = mean(!!sym(dependent)) # create mean math achievement per school
                ) |>
                select(manifesto_id, galtan, dependent_mean) |>
                unique()
            data_for_test$intercept_resid <- ranef(standardised_model)$manifesto_id[, 1]
            result_corr <- cor.test(data_for_test$intercept_resid, data_for_test$galtan)
            if (result_corr["p.value"] < 0.05) {
                print("Violating Assumption: Level 2 interecept residuals correlate with GAL-TAN")
                break
            }
            normality_result <- shapiro.test(data_for_test$intercept_resid)
            if (normality_result["p.value"] < 0.05) {
                print("Violating Assumption: Level 2 intercept residuals are not normal")
                break
            }
            n_per_manifesto <- calculation_data %>%
                group_by(manifesto_id) %>% # group by school
                select(manifesto_id) %>% # we just want to count schools
                count() %>%
                ungroup() %>%
                select(n) %>%
                unlist()
            calculation_data$intercept_resid <- rep(data_for_test$intercept_resid, times = n_per_manifesto)
            result_corr <- cor.test(calculation_data$l1resid, calculation_data$intercept_resid)
            if (result_corr["p.value"] < 0.05) {
                print("Violating Assumption: Level 2 intercept residuals not independent from level 1 residuals")
                break
            }
            result_corr <- cor.test(calculation_data$sentiment, calculation_data$intercept_resid)
            if (result_corr["p.value"] < 0.05) {
                print("Violating Assumption: Level 2 intercept residuals correlate with Sentiment")
                break
            }
            result_corr <- cor.test(calculation_data$govt, calculation_data$intercept_resid)
            if (result_corr["p.value"] < 0.05) {
                print("Violating Assumption: Level 2 intercept residuals correlate with Government")
                break
            }
            if (performance::check_singularity(standardised_model)) {
                overall_results <- overall_results %>% add_row(
                    foundation = dimension,
                    pole = pole,
                    method = method,
                    coef = NA,
                    ci_low = NA,
                    ci_high = NA
                )
                break
            }
            galtan_coef <- fixef(standardised_model)["galtan"]
            confidence_intervals <- confint(standardised_model, method = "Wald")
            overall_results <- overall_results %>% add_row(
                foundation = dimension,
                pole = pole,
                method = method,
                coef = as.double(galtan_coef),
                ci_low = confidence_intervals["galtan", ]["2.5 %"],
                ci_high = confidence_intervals["galtan", ]["97.5 %"]
            )
        }
    }
}
arrow::write_feather(overall_results, "data/for_graphs/regression/results.feather")
```

```{r}
regression_results <- arrow::read_feather("data/for_graphs/regression/results.feather")
regression_results |>
    mutate(
        Foundation = stringr::str_to_title(foundation),
        Pole = stringr::str_to_title(pole),
        Method = case_match(
            method,
            c("ddr") ~ "DDR",
            "ccr" ~ "CCR",
            "mfd2" ~ "MFD2",
            "mfd" ~ "MFD",
            "emfd" ~ "eMFD"
        )
    ) |>
    mutate(
        Foundation = factor(Foundation, levels = c("Care", "Fairness", "Loyalty", "Authority", "Sanctity")),
        Pole = factor(Pole, levels = c("Virtue", "Vice"))
    ) |>
    ggplot(aes(
        x = coef,
        y = reorder(Foundation, desc(Foundation)),
        colour = Method
    )) +
    geom_point(size = 3, position = position_dodge(width = 0.5)) +
    geom_linerange(aes(xmin = ci_low, xmax = ci_high), position = position_dodge(width = 0.5)) +
    geom_vline(xintercept = 0, linetype = "dotted", linewidth = 1) +
    facet_wrap(~Pole) +
    xlab("Standardised Regression Coefficient of GAL-TAN CHES score (Wald CI)") +
    ylab("Foundation")
ggsave(here("graphs", "regression", "regression_plot.pdf"), width = 8, height = 10)
```


## Appendix Material 

Correlation tables for every foundation across methods

```{r}
poles <- c("virtue", "vice")
foundations <- c("authority", "care", "fairness", "loyalty", "sanctity")
correlation_measures <- c("kendall", "spearman")
for (correlation_measure in correlation_measures) {
    for (foundation in foundations) {
        full_data <- data.frame()
        for (pole in poles) {
            temp_data <- read_feather(here("data", "for_graphs", "same_category", paste0("correlations_", foundation, "_", pole, "_", correlation_measure, ".feather")))
            temp_data$pole <- pole
            full_data <- rbind(full_data, temp_data)
        }
        correlations <- reshape2::melt(full_data) |>
            mutate(
                index = case_match(
                    index,
                    c("ddr_all_en") ~ "DDR",
                    "ccr_en_to_en" ~ "CCR",
                    "moralbert" ~ "MoralBERT",
                    "mfd" ~ "MFD",
                    "mfd2" ~ "MFD2",
                    "emfd" ~ "eMFD"
                ),
                variable = case_match(
                    variable,
                    c("ddr_all_en") ~ "DDR",
                    "ccr_en_to_en" ~ "CCR",
                    "moralbert" ~ "MoralBERT",
                    "mfd" ~ "MFD",
                    "mfd2" ~ "MFD2",
                    "emfd" ~ "eMFD"
                ),
                pole = str_to_title(pole)
            ) |>
            mutate(
                index = factor(index, levels = rev(methods_pretty_names), order = TRUE),
                variable = factor(variable, levels = rev(methods_pretty_names), order = TRUE)
            )
        corplot <-
            corplot <- ggplot(data = correlations, aes(index, variable,
                fill = value
            )) +
            geom_tile(color = "white") +
            scale_fill_gradient2( # low = "blue", high = "red", mid = "white",
                midpoint = 0, limit = c(-1, 1), space = "Lab",
                name = paste0(str_to_title(correlation_measure), " Correlation")
            ) +
            labs(
                x = "", y = ""
            ) +
            theme(axis.text.x = element_text(
                angle = 45, vjust = 1,
                size = 12, hjust = 1
            )) +
            coord_fixed() +
            facet_wrap(~pole) +
            geom_text(aes(index, variable, label = round(value, digits = 2)), size = 3.5) +
            scale_x_discrete(drop = FALSE) + # this is somehow necessary for the right factor order, don't know why
            scale_y_discrete(drop = FALSE)


        ggsave(here("graphs", "same_category", paste0("same_category_", foundation, "_", correlation_measure, ".pdf")), width = 12, height = 12)
    }
}

```

Table of description for regression
```{r}
data <- read_feather(here("data", "for_graphs", "regression", "current_working_data.feather"))

unique(data[, c("countryname", "partyname", "manifesto_year", "ches_year")]) |>
    mutate(
        ches_year = year(ches_year),
        manifesto_year = year(manifesto_year)
    ) |>
    arrange(desc(countryname), desc(partyname)) |>
    rename(
        "Country" = countryname,
        "Party" = partyname,
        "Year (Manifesto)" = manifesto_year,
        "Year (CHES)" = ches_year
    ) |>
    kable(
        format = "latex", booktabs = TRUE, longtable = T,
        caption = "Manifestos included in the regression analysis",
    ) |>
    collapse_rows(columns = 1:2) |>
    kable_styling(latex_options = c("repeat_header")) |>
    save_kable("graphs/tables/regression_descriptives.tex")
colnames(data)
```